import pandas as pd
import numpy as np
import seaborn as sns
from plotly.subplots import make_subplots
import plotly.express as px
import colorama
from colorama import Fore
import matplotlib.pyplot as plt
from termcolor import colored
...... Use right arrow-key for next slide...-->
df = pd.read_csv('movie_data_preprocessed.csv')
#search by genre
l = []
for i in df.Genre:
for j in i.split(","):
l.append(j.strip())
Available_Genres = list(set(l))
def genre_seach(s,df):
if len(s)==3:
a = s[0]
b = s[1]
c = s[2]
data = df.iloc[:,:9].loc[(df[a]==True) & (df[b]==True) & (df[c]==True)].sort_values('Rating', ascending = False)
elif len(s)==2:
a = s[0]
b = s[1]
data = df.iloc[:,:9].loc[(df[a]==True) & (df[b]==True)].sort_values('Rating', ascending = False)
elif len(s)==1:
a = s[0]
data = df.iloc[:,:9].loc[df[a]==True].sort_values('Rating', ascending = False)
return data.set_index('Title')
#Actor Search search upto 4 actor simultaniously
def actor_search(df,s,n):
if len(s)<=4:
for i in range(0,(4-len(s))):
s.append('Vola')
a = df.loc[(df['actor_1'] == s[0]) | (df['actor_1'] == s[1]) | (df['actor_1'] == s[2]) | (df['actor_1'] == s[3])].index
b = df.loc[(df['actor_2'] == s[0]) | (df['actor_2'] == s[1]) | (df['actor_2'] == s[2]) | (df['actor_2'] == s[3])].index
c = df.loc[(df['actor_3'] == s[0]) | (df['actor_3'] == s[1]) | (df['actor_3'] == s[2]) | (df['actor_3'] == s[3])].index
d = df.loc[(df['actor_4'] == s[0]) | (df['actor_4'] == s[1]) | (df['actor_4'] == s[2]) | (df['actor_4'] == s[3])].index
l = a.append(b).append(c).append(d)
a = list(set(l))
if n==1:
print('Total Number of Movies : ',len(a))
return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False)
else:
return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False)
#search by combining actors(all in one movie)
def actor_combination(df,b):
ok = df.copy()
df.Stars = df.Stars.apply(lambda x: x.title())
c = []
star = df.Stars.tolist()
for i in df.Stars:
if len(i.split(','))==4:
a = []
a.append(i.split(',')[0].strip())
a.append(i.split(',')[1].strip())
a.append(i.split(',')[2].strip())
a.append(i.split(',')[3].strip())
if len(set(b)-set(a)) == 0:
c.append(star.index(i))
else:
del a
else:
a = []
a.append(i.split(',')[0].strip())
a.append(i.split(',')[1].strip())
a.append(i.split(',')[2].strip())
if len(set(b)-set(a)) == 0:
c.append(star.index(i))
else:
del a
print('Total Number of Movies : ',len(c))
return ok.iloc[:,:9].iloc[c].sort_values('Rating', ascending = False).set_index('Title')
#search movie by director
def director_search(df,l):
b = 0
a = []
for i in df.Directors:
if len(set(l)-set([x.strip() for x in i.split(',')])) == 0:
a.append(b)
b = b+1
print('Total Number of Movies : ',len(a))
return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False).set_index('Title')
a = []
b = []
c = []
for i in df.Main_Directors.unique():
a.append(len(df.loc[df['Main_Directors']==i]))
c.append(round(np.mean(df['Rating'].loc[df['Main_Directors']==i]),2))
b.append(i)
ind = pd.Index(list(range(1,len(a)+1)))
data = pd.DataFrame(zip(b,a,c),columns = ['Director','Movie_count','Avg_Rating']).sort_values('Movie_count', ascending = False).set_index(ind)
print('\n')
str1 = '~'*15+' The Most busy Director '+'~'*15
print(Fore.CYAN + colored(str1, attrs=['bold']))
display(data.head(10))
~~~~~~~~~~~~~~~ The Most busy Director ~~~~~~~~~~~~~~~
| Director | Movie_count | Avg_Rating | |
|---|---|---|---|
| 1 | Ridley Scott | 13 | 6.89 |
| 2 | Steven Spielberg | 12 | 7.20 |
| 3 | Antoine Fuqua | 11 | 6.57 |
| 4 | Shawn Levy | 10 | 6.34 |
| 5 | Michael Bay | 10 | 6.31 |
| 6 | Clint Eastwood | 10 | 7.63 |
| 7 | Zack Snyder | 9 | 6.87 |
| 8 | Guy Ritchie | 9 | 7.13 |
| 9 | Tim Burton | 9 | 6.94 |
| 10 | Ron Howard | 8 | 6.99 |
Profit made by the movie would be the difference between amount put into production and amount made by sales and theater.
The list underneath shows the top 10 gained movies and tail(10) movies
#Movies with HIghest LOss and Gains
dfn = df[['Title','Rating','Main_Directors','new_budget','new_Income']].loc[df['new_Income']>0]
dfn['new_budget'] = round(dfn['new_budget']/1000000,2)
dfn['new_Income'] = round(dfn['new_Income']/1000000,2)
dfn['Amount_Gained'] = dfn['new_Income']-dfn['new_budget']
#dfn.head(10)
a = dfn.sort_values('Amount_Gained', ascending = False).reset_index().drop('index',axis=1)
b = dfn.sort_values('Amount_Gained').reset_index().drop('index',axis=1)
a.index += 1
b.index += 1
print('\n')
str1 = '~'*15+' Movies with biggest gains '+'~'*15
print(Fore.GREEN + colored(str1, attrs=['bold']))
display(a.head(10))
print('\n')
str1 = '~'*15+' Movies with biggest losses '+'~'*15
print(Fore.RED + colored(str1, attrs=['bold']))
display(b.head(10))
~~~~~~~~~~~~~~~ Movies with biggest gains ~~~~~~~~~~~~~~~
| Title | Rating | Main_Directors | new_budget | new_Income | Amount_Gained | |
|---|---|---|---|---|---|---|
| 1 | Avatar | 7.8 | James Cameron | 237.0 | 2922.92 | 2685.92 |
| 2 | Avengers: Endgame | 8.4 | Anthony Russo | 356.0 | 2797.50 | 2441.50 |
| 3 | Avatar: The Way of Water | 7.8 | James Cameron | 350.0 | 2267.95 | 1917.95 |
| 4 | Star Wars: Episode VII - The Force Awakens | 7.8 | J J Abrams | 245.0 | 2069.52 | 1824.52 |
| 5 | Avengers: Infinity War | 8.4 | Anthony Russo | 321.0 | 2048.36 | 1727.36 |
| 6 | Spider-Man: No Way Home | 8.3 | Jon Watts | 200.0 | 1917.43 | 1717.43 |
| 7 | Jurassic World | 6.9 | Colin Trevorrow | 150.0 | 1671.54 | 1521.54 |
| 8 | The Lion King | 6.8 | Jon Favreau | 260.0 | 1663.25 | 1403.25 |
| 9 | Furious 7 | 7.1 | James Wan | 190.0 | 1515.34 | 1325.34 |
| 10 | Top Gun: Maverick | 8.4 | Joseph Kosinski | 170.0 | 1488.73 | 1318.73 |
~~~~~~~~~~~~~~~ Movies with biggest losses ~~~~~~~~~~~~~~~
| Title | Rating | Main_Directors | new_budget | new_Income | Amount_Gained | |
|---|---|---|---|---|---|---|
| 1 | The Gray Man | 6.5 | Anthony Russo | 200.0 | 0.45 | -199.55 |
| 2 | The Tomorrow War | 6.5 | Chris McKay | 200.0 | 14.40 | -185.60 |
| 3 | Red Notice | 6.3 | Rawson Marshall Thurber | 160.0 | 0.18 | -159.82 |
| 4 | The Irishman | 7.8 | Martin Scorsese | 159.0 | 0.97 | -158.03 |
| 5 | Pinocchio 2 | 5.1 | Robert Zemeckis | 150.0 | 0.04 | -149.96 |
| 6 | Mulan | 5.7 | Niki Caro | 200.0 | 69.97 | -130.03 |
| 7 | White Noise | 6.6 | Noah Baumbach | 80.0 | 0.07 | -79.93 |
| 8 | Don't Look Up | 7.2 | Adam McKay | 75.0 | 0.79 | -74.21 |
| 9 | How Do You Know | 5.4 | James L Brooks | 120.0 | 48.67 | -71.33 |
| 10 | Devotion | 7.0 | J D Dillard | 90.0 | 19.95 | -70.05 |
dfn = df.iloc[:,15:34].astype(int)
a = []
b = []
for i in dfn.columns:
a.append(dfn[i].sum())
b.append(i)
dfn = pd.DataFrame(zip(b,a),columns = ['Genre','Count']).sort_values('Count',ascending = False).reset_index().drop('index',axis=1)
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(10, 5))
sns.color_palette("tab10")
my_plot = sns.barplot(y = dfn['Count'], x = dfn['Genre'])
my_plot.set_xticklabels(my_plot.get_xticklabels(), rotation=45)
ax.bar_label(ax.containers[0])
my_plot.set(xlabel=None)
my_plot.set_title('Movies in current decade per Genre')
Text(0.5, 1.0, 'Movies in current decade per Genre')
The graph with a list underneath makes it easier to choose one on the Loong movies or the short once.
ok = df.iloc[:,:9].query("Runtime > 0").sort_values('Runtime', ascending = False)
fig1 = px.bar(ok.head(5), y='Runtime', x='Title',
title="Movies by Runtime",
color = 'Genre', text = 'Title')
fig2 = px.bar(ok.tail(5), y='Runtime', x='Title',
color = 'Genre', text = 'Title',)
fig1.update_xaxes(visible=False)
fig2.update_xaxes(visible=False)
fig1.show()
fig2.show()
print('\n')
str1 = '~'*45+' Top Loooonng Movies '+'~'*45
print(Fore.RED + colored(str1, attrs=['bold']))
display(df.iloc[:,:9].sort_values('Runtime', ascending = False).head(5).sort_values('Rating',ascending = False))
print('\n')
str1 = '~'*45+' The Shortest Movies '+'~'*45
print(Fore.GREEN + colored(str1, attrs=['bold']))
display(df.iloc[:,:9].query("Runtime > 0").sort_values('Runtime').head(5).sort_values('Rating',ascending = False))
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top Loooonng Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Title | Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|---|
| 1903 | The Lord of the Rings: The Return of the King | 9.0 | 2003 | December | PG-13 | 201 | Peter Jackson | Elijah Wood, Viggo Mortensen, Ian McKellen, Or... | Action, Adventure, Drama |
| 123 | Zack Snyder's Justice League | 8.0 | 2021 | March | R | 242 | Zack Snyder | Henry Cavill, Ben Affleck, Gal Gadot, Amy Adams | Action, Adventure, Fantasy |
| 315 | The Irishman | 7.8 | 2019 | November | R | 209 | Martin Scorsese | Robert De Niro, Al Pacino, Joe Pesci, Harvey K... | Biography, Crime, Drama |
| 0 | Avatar: The Way of Water | 7.8 | 2022 | December | PG-13 | 192 | James Cameron | Sam Worthington, Zoe Saldana, Sigourney Weaver... | Action, Adventure, Fantasy |
| 1564 | Grindhouse | 7.5 | 2007 | April | R | 191 | Robert Rodriguez, Eli Roth, Quentin Tarantino,... | Kurt Russell, Rose McGowan, Danny Trejo, Zo Bell | Action, Horror, Thriller |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Shortest Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Title | Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|---|
| 995 | Banshee | 8.5 | 2013 | September | Unrated | 50 | Gemma Mc Carthy | Jonathan O Dwyer, Sean Flood, Frank Hurley, Fi... | Action |
| 477 | The VelociPastor | 5.0 | 2018 | August | Unrated | 75 | Brendan Steere | Greg Cohan, George Schewnzer, Janice Young, Da... | Action, Adventure, Comedy |
| 1831 | 9 Songs | 4.8 | 2004 | March | Unrated | 71 | Michael Winterbottom | Kieran O Brien, Margo Stilley, Robert Levon Be... | Drama, Music, Romance |
| 1454 | Superhero Movie | 4.6 | 2008 | March | PG-13 | 75 | Craig Mazin | Drake Bell, Leslie Nielsen, Sara Paxton, Chris... | Action, Comedy, Sci-Fi |
| 578 | Picture of Beauty | 3.4 | 2017 | April | Unrated | 70 | Maxim Ford | Taylor Sands, Danielle Rose, Pawel Hajnos, Mag... | Drama, Romance |
import squarify
a = df['Filming_location'].loc[df['Filming_location'] != 'Unknown'].value_counts().head(10)
plt.figure(figsize=(12,5))
squarify.plot(sizes=a.values.tolist(), label=a.index.tolist(), value=a.values.tolist(),
color=[plt.cm.Set2(i) for i in range(7)],
text_kwargs={'fontsize': 13.8})
plt.title('TOP 10 Country Praduction', fontsize=20,fontweight='bold')
plt.axis('off')
plt.show()
Ever wondered which month has the highest release rates and how the other months would be compared to amount of releses,
this graph would help you in better understanding this
a = df.Month.value_counts().index
b = df.Month.value_counts().values
data = pd.DataFrame(zip(a,b),columns = ['Month','count'])
data = data.iloc[[8,11,6,10,9,3,4,1,5,2,0,1]]
fig, ax = plt.subplots(figsize=(12, 5))
my_plot = sns.barplot(y = data['Month'],x = data['count'])
my_plot.bar_label(ax.containers[0])
my_plot.set(xlabel=None, ylabel=None)
my_plot.set_title('Movies praduced montly around the decade')
Text(0.5, 1.0, 'Movies praduced montly around the decade')
from tabulate import tabulate
def print_tabular(ok,c1,c2):
q = [i[0] for i in ok.index]
q1 = [i for i in ok]
return print (tabulate(zip(q,q1), headers=[c1, c2]))
l = []
for i in df.Stars:
for a in i.split(','):
l.append(a.strip())
dfn = pd.DataFrame(l)
print('\n')
str1 = '~'*10+' Top 10 Busy Stars of the Decade '+'~'*10
print(Fore.RED + colored(str1, attrs=['bold']))
ok = dfn.value_counts().head(10)
print_tabular(ok,"Actor","Movie_Count")
~~~~~~~~~~ Top 10 Busy Stars of the Decade ~~~~~~~~~~
Actor Movie_Count
------------------ -------------
Dwayne Johnson 25
Mark Wahlberg 24
Ryan Reynolds 23
Brad Pitt 22
Amy Adams 22
Matt Damon 22
Scarlett Johansson 22
Liam Neeson 21
Tom Hanks 21
Cate Blanchett 21
dfn = df[['Title','Rating','new_budget','Filming_location','origin_count']].sort_values('origin_count', ascending = False).head(10).reset_index().drop('index', axis =1)
dfn['Budget_Million'] = dfn['new_budget']/1000000
print('\n')
str1 = '~'*40+' Top Multi-Origin Movies '+'~'*40
print(Fore.BLUE + colored(str1, attrs=['bold']))
dfn.drop('new_budget',axis =1)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top Multi-Origin Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Title | Rating | Filming_location | origin_count | Budget_Million | |
|---|---|---|---|---|---|
| 0 | Triangle of Sadness | 7.6 | Greece | 10 | 10.70 |
| 1 | Dogville | 8.0 | Sweden | 9 | 10.00 |
| 2 | Assassin's Creed | 5.6 | Spain | 8 | 125.00 |
| 3 | Valerian and the City of a Thousand Planets | 6.4 | France | 7 | 177.20 |
| 4 | The Hitman's Bodyguard | 6.9 | The Netherlands | 7 | 30.00 |
| 5 | Kingdom of Heaven | 7.2 | Morocco | 7 | 130.00 |
| 6 | Mr. Nobody | 7.8 | Canada | 7 | 47.00 |
| 7 | Brimstone | 7.0 | Hungary | 7 | 12.84 |
| 8 | Nymphomaniac: Vol. I | 6.9 | Germany | 7 | 4.70 |
| 9 | Valkyrie | 7.1 | Germany | 7 | 75.00 |
dfn = df.loc[df['Animation']==True].sort_values('new_Income', ascending = False).sort_values('Rating', ascending = False).reset_index().drop('index',axis=1)
print('\n')
str1 = '~'*50+' Top 12 Animation Movies '+'~'*50
print(Fore.YELLOW + colored(str1, attrs=['bold']))
dfn.iloc[:,:9].head(12)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top 12 Animation Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Title | Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | WALL·E | 8.4 | 2008 | June | G | 98 | Andrew Stanton | Ben Burtt, Elissa Knight, Jeff Garlin, Fred Wi... | Animation, Adventure, Family |
| 1 | Coco | 8.4 | 2017 | November | PG | 105 | Lee Unkrich, Adrian Molina | Anthony Gonzalez, Gael Garc a Bernal, Benjamin... | Animation, Adventure, Comedy |
| 2 | Your Name. | 8.4 | 2016 | April | PG | 106 | Makoto Shinkai | Ry nosuke Kamiki, Mone Kamishiraishi, Ry Narit... | Animation, Drama, Fantasy |
| 3 | Spider-Man: Into the Spider-Verse | 8.4 | 2018 | December | PG | 117 | Bob Persichetti, Peter Ramsey, Rodney Rothman | Shameik Moore, Jake Johnson, Hailee Steinfeld,... | Animation, Action, Adventure |
| 4 | Toy Story 3 | 8.3 | 2010 | June | G | 103 | Lee Unkrich | Tom Hanks, Tim Allen, Joan Cusack, Ned Beatty | Animation, Adventure, Comedy |
| 5 | Up | 8.3 | 2009 | May | PG | 96 | Pete Docter, Bob Peterson | Edward Asner, Jordan Nagai, John Ratzenberger,... | Animation, Adventure, Comedy |
| 6 | Howl's Moving Castle | 8.2 | 2004 | June | PG | 119 | Hayao Miyazaki | Chieko Baish , Takuya Kimura, Tatsuya Gash in,... | Animation, Adventure, Family |
| 7 | Demon Slayer the Movie: Mugen Train | 8.2 | 2020 | April | R | 117 | Haruo Sotozaki | Natsuki Hanae, Akari Kit , Yoshitsugu Matsuoka... | Animation, Action, Adventure |
| 8 | Finding Nemo | 8.2 | 2003 | May | G | 100 | Andrew Stanton, Lee Unkrich | Albert Brooks, Ellen DeGeneres, Alexander Goul... | Animation, Adventure, Comedy |
| 9 | Inside Out | 8.2 | 2015 | June | PG | 95 | Pete Docter, Ronnie Del Carmen | Amy Poehler, Bill Hader, Lewis Black, Mindy Ka... | Animation, Adventure, Comedy |
| 10 | How to Train Your Dragon | 8.1 | 2010 | March | PG | 98 | Dean DeBlois, Chris Sanders | Jay Baruchel, Gerard Butler, Christopher Mintz... | Animation, Action, Adventure |
| 11 | A Silent Voice: The Movie | 8.1 | 2016 | September | Unrated | 130 | Naoko Yamada | Miyu Irino, Saori Hayami, Aoi Y ki, Kensh Ono | Animation, Drama |
a,r,g,bug = ([] for i in range(4))
for n,i in enumerate(df.Stars):
for j in i.split(','):
a.append(j.strip())
r.append(df.Rating.iloc[n])
g.append(df.Genre.iloc[n])
bug.append(df.new_budget.iloc[n])
ns = pd.DataFrame(zip(a,r,g,bug), columns = ['Star','Rating','Genre','Budget'])
#ns.shape
a,b,c,d = ([] for i in range(4))
for i in ns.Star.unique():
a.append(ns['Genre'].loc[ns['Star']==i].tolist())
b.append(i)
c.append(round(ns['Rating'].loc[ns['Star']==i].mean(),2))
d.append(round(ns['Budget'].loc[ns['Star']==i].mean()/1000000,2))
ns1 = pd.DataFrame(zip(b,c,a,d), columns = ['Star','Rating','Genre','Budget'])
#ns1.sample(3)
b,a,v,q = ([] for i in range(4))
for i in ns1['Genre']:
a = []
for x in i:
for j in x.split(','):
a.append(j.strip())
b.append(list(set(a)))
v.append(len(set(a)))
for s in ns1.Star:
q.append(len(actor_search(df,[s],0)))
ns1['All_Genre'] = b
ns1['Genre_Count'] = v
ns1['Movie_Count'] = q
del ns
ns1.drop('Genre',axis =1, inplace = True)
display(ns1.shape)
#ns1.sample(5)
(3502, 6)
from IPython.display import Image
Image(filename='actor_award.gif')
<IPython.core.display.Image object>
from IPython.display import Image
Image(filename='R.png')

from pandas import option_context
with option_context('display.max_colwidth', None):
print("Budget in Million $")
print('\n')
str2 = '~'*45+' Actors Trusted with High Budget '+'~'*45
print(Fore.MAGENTA + colored(str2, attrs=['bold']))
display(ns1.sort_values(by = 'Budget', ascending = False).head(5).set_index("Star").T)
print('\n')
str1 ='~'*45+' Most Varsatile Actor '+'~'*45
print(Fore.CYAN + colored(str1, attrs=['bold']))
display(ns1.sort_values(by = 'Genre_Count', ascending = False).head(1).set_index("Star").T)
print('\n')
str3 = '~'*45+' Actor with maax Movies '+'~'*45
print(Fore.RED + colored(str3, attrs=['bold']))
display(ns1.sort_values(by = 'Movie_Count', ascending = False).head(1).set_index("Star").T)
print('\n')
str4 = '~'*45+' Most consistent Performance '+'~'*45
print(Fore.GREEN + colored(str4, attrs=['bold']))
display(ns1.loc[ns1['Movie_Count']>10].sort_values(by = 'Rating', ascending = False).head(1).set_index("Star").T)
Budget in Million $
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Actors Trusted with High Budget ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Star | Mark Hamill | Carrie Fisher | Daisy Ridley | Brandon Routh | Donald Glover |
|---|---|---|---|---|---|
| Rating | 6.9 | 6.9 | 7.07 | 6.1 | 6.85 |
| Budget | 317.0 | 317.0 | 279.0 | 270.0 | 267.5 |
| All_Genre | [Adventure, Action, Fantasy] | [Adventure, Action, Fantasy] | [Adventure, Sci-Fi, Action, Fantasy] | [Adventure, Action, Sci-Fi] | [Sci-Fi, Adventure, Action, Drama, Animation] |
| Genre_Count | 3 | 3 | 4 | 3 | 5 |
| Movie_Count | 1 | 1 | 3 | 1 | 2 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Most Varsatile Actor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Star | Samuel L Jackson |
|---|---|
| Rating | 6.87 |
| Budget | 78.24 |
| All_Genre | [Crime, Thriller, Sci-Fi, Biography, Adventure, Sport, Family, Horror, Action, Mystery, Fantasy, Music, Comedy, Drama, Animation] |
| Genre_Count | 15 |
| Movie_Count | 21 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Actor with maax Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Star | Dwayne Johnson |
|---|---|
| Rating | 6.46 |
| Budget | 113.64 |
| All_Genre | [Thriller, Crime, Sci-Fi, Biography, Adventure, Horror, Action, Fantasy, Comedy, Drama, Animation] |
| Genre_Count | 11 |
| Movie_Count | 25 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Most consistent Performance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| Star | Leonardo DiCaprio |
|---|---|
| Rating | 7.84 |
| Budget | 96.15 |
| All_Genre | [Crime, Thriller, Romance, Sci-Fi, Biography, Adventure, Western, Action, Mystery, Comedy, Drama] |
| Genre_Count | 11 |
| Movie_Count | 13 |
from pandas import option_context
with option_context('display.max_colwidth', None):
print("Budget in Million $")
print('\n')
str2 = '~'*15+' Actors Trusted with High Budget '+'~'*15
print(Fore.MAGENTA + colored(str2, attrs=['bold']))
display(ns1.sort_values(by = 'Budget', ascending = False).head(5).set_index("Star").T)
Budget in Million $
~~~~~~~~~~~~~~~ Actors Trusted with High Budget ~~~~~~~~~~~~~~~
| Star | Mark Hamill | Carrie Fisher | Daisy Ridley | Brandon Routh | Donald Glover |
|---|---|---|---|---|---|
| Rating | 6.9 | 6.9 | 7.07 | 6.1 | 6.85 |
| Budget | 317.0 | 317.0 | 279.0 | 270.0 | 267.5 |
| All_Genre | [Adventure, Action, Fantasy] | [Adventure, Action, Fantasy] | [Adventure, Sci-Fi, Action, Fantasy] | [Adventure, Action, Sci-Fi] | [Sci-Fi, Adventure, Action, Drama, Animation] |
| Genre_Count | 3 | 3 | 4 | 3 | 5 |
| Movie_Count | 1 | 1 | 3 | 1 | 2 |
print('\n')
str4 = '~'*15+' Most consistent Performance '+'~'*15
print(Fore.GREEN + colored(str4, attrs=['bold']))
display(ns1.loc[ns1['Movie_Count']>10].sort_values(by = 'Rating', ascending = False).head(1).set_index("Star").T)
~~~~~~~~~~~~~~~ Most consistent Performance ~~~~~~~~~~~~~~~
| Star | Leonardo DiCaprio |
|---|---|
| Rating | 7.84 |
| Budget | 96.15 |
| All_Genre | [Crime, Thriller, Romance, Sci-Fi, Biography, ... |
| Genre_Count | 11 |
| Movie_Count | 13 |
from IPython.display import Image
Image(filename='gains.png')
x = df['Rating']
y = df['new_Income']/1000000
z = df['new_budget']/1000000
c = [i.split(',')[0] for i in df['Genre']]
fig = px.scatter(x=z, y=y, title='Gains over Budget comparision of all the movies ', color=c,
width=750, height=600)
fig.update_layout(
xaxis_title="Budget",
yaxis_title="Income",
legend_title="Genres",)
import seaborn as sns
plt.figure(figsize=(8,6))
palette = sns.color_palette("tab10", as_cmap=True)
#display(sns.lineplot(y=y,x=x))
#display(sns.lineplot(y=z,x=x))
sns.lineplot(x=z,y=y,palette=palette)
fig.show()
C:\Users\rishi\AppData\Local\Temp\ipykernel_15572\682156227.py:19: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
sns.lineplot(x=z,y=y)
plt.figure(figsize=(8,6))
<Figure size 800x600 with 0 Axes>
<Figure size 800x600 with 0 Axes>
#Making Search Algorithms ever easier
def search_(s):
l = []
for j in s.split(','):
i = j.strip()
l.append(i.title())
return l
def search_in_genre(s):
l = search_(s)
return genre_seach(l,df)
def search_by_actor(s):
b = search_(s)
return actor_combination(df,b)
def search_by_director(s):
return actor_combination(df,search_(s))
Returns Table Sorted By Top Rating First
# List of all available Genres
l = []
for i in df.Genre:
for j in i.split(","):
l.append(j.strip())
Available_Genres = list(set(l))
ind = list(range(1,len(Available_Genres)+1))
print(tabulate(zip(ind,Available_Genres), headers=['S.No.','List of Available Genres'], tablefmt="grid"))
+---------+----------------------------+ | S.No. | List of Available Genres | +=========+============================+ | 1 | Thriller | +---------+----------------------------+ | 2 | Romance | +---------+----------------------------+ | 3 | Biography | +---------+----------------------------+ | 4 | Family | +---------+----------------------------+ | 5 | Animation | +---------+----------------------------+ | 6 | History | +---------+----------------------------+ | 7 | Musical | +---------+----------------------------+ | 8 | Drama | +---------+----------------------------+ | 9 | Crime | +---------+----------------------------+ | 10 | War | +---------+----------------------------+ | 11 | Sci-Fi | +---------+----------------------------+ | 12 | Adventure | +---------+----------------------------+ | 13 | Action | +---------+----------------------------+ | 14 | Music | +---------+----------------------------+ | 15 | Fantasy | +---------+----------------------------+ | 16 | Sport | +---------+----------------------------+ | 17 | Horror | +---------+----------------------------+ | 18 | Western | +---------+----------------------------+ | 19 | Mystery | +---------+----------------------------+ | 20 | Comedy | +---------+----------------------------+
search_in_genre('drama')
search_by_actor('Leonardo Dicaprio')
Total Number of Movies : 13
| Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|
| Title | ||||||||
| Inception | 8.8 | 2010 | July | PG-13 | 148 | Christopher Nolan | Leonardo DiCaprio, Joseph Gordon Levitt, Ellio... | Action, Adventure, Sci-Fi |
| The Departed | 8.5 | 2006 | October | R | 151 | Martin Scorsese | Leonardo DiCaprio, Matt Damon, Jack Nicholson,... | Crime, Drama, Thriller |
| Django Unchained | 8.4 | 2012 | December | R | 165 | Quentin Tarantino | Jamie Foxx, Christoph Waltz, Leonardo DiCaprio... | Drama, Western |
| The Wolf of Wall Street | 8.2 | 2013 | December | R | 180 | Martin Scorsese | Leonardo DiCaprio, Jonah Hill, Margot Robbie, ... | Biography, Comedy, Crime |
| Shutter Island | 8.2 | 2010 | February | R | 138 | Martin Scorsese | Leonardo DiCaprio, Emily Mortimer, Mark Ruffal... | Mystery, Thriller |
| The Revenant | 8.0 | 2015 | January | R | 156 | Alejandro G I rritu | Leonardo DiCaprio, Tom Hardy, Will Poulter, Do... | Action, Adventure, Drama |
| Blood Diamond | 8.0 | 2006 | December | R | 143 | Edward Zwick | Leonardo DiCaprio, Djimon Hounsou, Jennifer Co... | Adventure, Drama, Thriller |
| Once Upon a Time in Hollywood | 7.6 | 2019 | July | R | 161 | Quentin Tarantino | Leonardo DiCaprio, Brad Pitt, Margot Robbie, E... | Comedy, Drama |
| The Aviator | 7.5 | 2004 | December | PG-13 | 170 | Martin Scorsese | Leonardo DiCaprio, Cate Blanchett, Kate Beckin... | Biography, Drama |
| Revolutionary Road | 7.3 | 2008 | January | R | 119 | Sam Mendes | Leonardo DiCaprio, Kate Winslet, Christopher F... | Drama, Romance |
| Don't Look Up | 7.2 | 2021 | December | R | 138 | Adam McKay | Leonardo DiCaprio, Jennifer Lawrence, Meryl St... | Comedy, Drama, Sci-Fi |
| The Great Gatsby | 7.2 | 2013 | May | PG-13 | 143 | Baz Luhrmann | Leonardo DiCaprio, Carey Mulligan, Joel Edgert... | Drama, Romance |
| Body of Lies | 7.0 | 2008 | October | R | 128 | Ridley Scott | Leonardo DiCaprio, Russell Crowe, Mark Strong,... | Action, Drama, Thriller |
search_by_director('Sean Penn')
Total Number of Movies : 5
| Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|
| Title | ||||||||
| Mystic River | 7.9 | 2003 | October | R | 138 | Clint Eastwood | Sean Penn, Tim Robbins, Kevin Bacon, Emmy Rossum | Crime, Drama, Mystery |
| 21 Grams | 7.6 | 2003 | January | R | 124 | Alejandro G I rritu | Sean Penn, Benicio Del Toro, Naomi Watts, Dann... | Crime, Drama, Thriller |
| Licorice Pizza | 7.2 | 2021 | December | R | 133 | Paul Thomas Anderson | Alana Haim, Cooper Hoffman, Sean Penn, Tom Waits | Comedy, Drama, Romance |
| The Tree of Life | 6.8 | 2011 | May | PG-13 | 139 | Terrence Malick | Brad Pitt, Sean Penn, Jessica Chastain, Hunter... | Drama, Fantasy |
| Gangster Squad | 6.7 | 2013 | January | R | 113 | Ruben Fleischer | Sean Penn, Ryan Gosling, Emma Stone, Giovanni ... | Action, Crime, Drama |
search_in_genre('adventure, biography')
| Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|
| Title | ||||||||
| Into the Wild | 8.1 | 2007 | October | R | 148 | Sean Penn | Emile Hirsch, Vince Vaughn, Catherine Keener, ... | Adventure, Biography, Drama |
| Papillon | 7.2 | 2017 | August | R | 133 | Michael Noer | Charlie Hunnam, Damijan Oklopdzic, Christopher... | Adventure, Biography, Crime |
| Everest | 7.1 | 2015 | September | PG-13 | 121 | Baltasar Korm kur | Jason Clarke, Ang Phula Sherpa, Thomas M Wrigh... | Action, Adventure, Biography |
| Wild | 7.1 | 2014 | December | R | 115 | Jean Marc Vall e | Reese Witherspoon, Laura Dern, Gaby Hoffmann, ... | Adventure, Biography, Drama |
| In the Heart of the Sea | 6.9 | 2015 | December | PG-13 | 122 | Ron Howard | Chris Hemsworth, Cillian Murphy, Brendan Glees... | Action, Adventure, Biography |
| The Lost City of Z | 6.6 | 2016 | April | PG-13 | 141 | James Gray | Charlie Hunnam, Robert Pattinson, Sienna Mille... | Adventure, Biography, Drama |
search_by_actor('Leonardo Dicaprio, Christoph Waltz')
Total Number of Movies : 1
| Rating | Year | Month | Certificate | Runtime | Directors | Stars | Genre | |
|---|---|---|---|---|---|---|---|---|
| Title | ||||||||
| Django Unchained | 8.4 | 2012 | December | R | 165 | Quentin Tarantino | Jamie Foxx, Christoph Waltz, Leonardo Dicaprio... | Drama, Western |